# Load packages
library(sf)
library(xml2)
library(tidyverse)
library(tidybayes)
library(brms)
library(vegan)
# Define genus level taxon groups (plus one family FAVI)
taxon_groups <- list(
PORI = c("PPOR", "PFUR", "PDIV", "PAST", "PORI"),
ORBI = c("OFAV", "OANN", "OFRA", "ORBI"),
FAVI = c("CNAT", "DLAB", "PSTR", "PCLI", "MARE", "FAVI"),
AGAR = c("AFRA", "AAGA", "AHUM", "ALAM", "AGAR"),
MADR = c("MAUR", "MSEN", "MDEC", "MPHA", "MADR"),
SOLE = c("SHYA", "SBOU", "SOLE"),
SCOL = c("SLAC", "SCUB", "SCOL"),
SIDE = c("SSID", "SRAD", "SIDE"),
MYCE = c("MFER", "MLAM", "MALI", "MYCE"),
OCUL = c("OROB", "ODIF", "OCUL")
)
# Convert to lookup tibble
taxon_lookup <- enframe(taxon_groups, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Define juvenile family level taxon groups (following DRM survey convention)
taxon_groups_juv <- list(
MUSS = c("ISIN", "ISOP", "MANG", "MYCE", "SCOL", "MUSS"),
FAVI = c("FAVI", "FFRA", "MARE"),
MEAN = c("MMEA", "MEAN", "DCYL", "DSTO", "EFAS")
)
# Convert to lookup tibble
taxon_lookup_juv <- enframe(taxon_groups_juv, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Order levels of Habitat Types
type_levels <- c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Outer Reef", "Aggregated Patch Reef")
type_labels <- c("NRC", "IR", "MR", "OR", "APR")
names(type_labels) <- type_levels
# Order survey datasets
dataset_levels <- c("dca17", "tt21", "tt23", "drm24")
dataset_labels <- c("2017—DCA", "2021—TT", "2023—TT", "2024—Shedd")
names(dataset_labels) <- dataset_levels
# Import data
nsu11_esa0 <- readxl::read_xlsx("data/2011_nsu_esa/Port Everglades_NSU 2011and DCA 2017_ESA surveys.xlsx",
sheet = "2011_NSU_ESA survey") %>%
janitor::clean_names()
# Site metadata
nsu11_esa_sitemd <- nsu11_esa0 %>%
select(site = ident, latitude = lat, longitude = long) %>%
mutate(site = as.character(site))
# ESA coral count data
nsu11_esa_counts <- nsu11_esa0 %>%
select(site = ident,
ACER = a_cervic_1, OANN = m_annula_1, OFAV = m_faveol_1, OFRA = m_franks_1, MFER = m_ferox_1) %>%
pivot_longer(-site, names_to = "taxon", values_to = "n") %>%
# Assume all were >4cm since no sizes are reported
mutate(class = ">4cm") %>%
mutate(site = as.character(site))
# Aggregate taxa (multiple orbicella observed --> ORBI)
nsu11_esa_counts_ag <- nsu11_esa_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
library(sf)
# Read KML file
dca17_esa_sitemd0 <- st_read("data/2017_dca_esa/Listed_Coral_Survey_2017.kml")
## Reading layer `Listed_Coral_Survey_2017' from data source
## `/Users/rosscunning/Projects/PENIP/data/2017_dca_esa/Listed_Coral_Survey_2017.kml'
## using driver `KML'
## Simple feature collection with 149 features and 2 fields
## Geometry type: POINT
## Dimension: XYZ
## Bounding box: xmin: -80.10416 ymin: 26.08429 xmax: -80.08178 ymax: 26.10336
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# Extract coordinates and save as CSV
dca17_esa_sitemd <- dca17_esa_sitemd0 %>%
mutate(site = Name,
longitude = st_coordinates(.)[, 1],
latitude = st_coordinates(.)[, 2]) %>%
st_drop_geometry() %>%
select(site, longitude, latitude) %>%
as_tibble() %>%
mutate(site = as.character(site))
# Get coral data
# Appendix C -- does not contain individual sizes
# dca17_esa0 <- readxl::read_xlsx("data/2017_dca_esa/Appendix C_DCA2017_survey_results_FINAL.xlsx") %>%
# janitor::clean_names()
# dca17_esa0 %>%
# select(site = esa_site, ACER = a_cervicornis_n, OFAV = o_faveolata_n,
# area_m2 = habitat_surveyed_m2_based_on_walker_and_klug_2014)
# Appendix D -- contains individual colony sizes
# what is the "NUMBER" column? ignore for now.
dca17_esa0 <- readxl::read_xlsx("data/2017_dca_esa/Appendix D_ESA_listed_coral_Plotted_Locations.xlsx") %>%
janitor::clean_names() %>%
select(site = site_id, taxon = species, length_cm, width_cm, height_cm, m2_of_habi, density) %>%
mutate(across(ends_with("cm"), as.numeric))
# Assign size classes
dca17_esa <- dca17_esa0 %>%
mutate(max_dim_cm = pmax(length_cm, width_cm, height_cm, na.rm = TRUE),
class = if_else(max_dim_cm >= 4, ">4cm", "<4cm"))
# Count taxon and size class per site
dca17_esa_counts <- dca17_esa %>%
count(site, taxon, class) %>%
mutate(site = as.character(site))
# Total area surveyed per site = 784 m2 (crossed 100x4m belt transects with 16m2 overlap)
# Agreggate taxa (OFAV -> ORBI)
dca17_esa_counts_ag <- dca17_esa_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
DCA site metadata
# Site metadata
# Read in site coordinates
dca17_sitemd0 <- readxl::read_xlsx("data/2017_dca_recon/Recon_Site_Coordinates_Extracted.xlsx") %>%
janitor::clean_names()
# All sites have start and end coordinates...
# Tidy and Calculate midpoint per transect
dca17_sitemd <- dca17_sitemd0 %>%
mutate(
depth = abs(as.numeric(depth)),
across(c(latitude, longitude), as.numeric)
) %>%
group_by(site = transect) %>%
summarize(
latitude = mean(latitude, na.rm = TRUE),
longitude = mean(longitude, na.rm = TRUE),
depth = mean(depth, na.rm = TRUE),
.groups = "drop"
)
DCA coral data
# Read in survey data
dca170 <- readxl::read_xlsx("data/2017_dca_recon/Compiled_DCA_RECON_Belt_data.xlsx") %>%
janitor::clean_names()
dca17 <- dca170 %>%
select(1:18) %>%
rename(site = site_name) %>%
mutate(site = factor(site)) %>%
select(site, taxon = coral_species, max_width_cm = max_size_cm)
# Adjust/corrects species IDs
sort(unique(dca17$taxon))
## [1] "AAGA" "ACER" "AFRA"
## [4] "AGA SP" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral"
## [10] "DLAB" "DSTO" "EFAS"
## [13] "LCUC" "MAD SP" "MADSP"
## [16] "MALI" "MARE" "MCAV"
## [19] "MDEC" "MLAM" "MMEA"
## [22] "MPHA" "Mycetophyllia spp." "MYCSP"
## [25] "OANN" "ODIF" "OFAV"
## [28] "OFAV\\" "PAST" "PCLI"
## [31] "PFUR" "PPOR" "PSTR"
## [34] "SBOU" "Scolymia Spp" "SCUB"
## [37] "Sid SP" "SID SP" "SID SP."
## [40] "SIDSP" "SINT" "SRAD"
## [43] "SSID"
dca17 <- dca17 %>%
mutate(taxon = case_when(
taxon == "AGA SP" ~ "AGAR",
taxon == "LCUC" ~ "HCUC",
taxon %in% c("MYCSP", "Mycetophyllia spp.") ~ "MYCE",
taxon == "OFAV\\" ~ "OFAV",
taxon %in% c("MAD SP", "MADSP") ~ "MADR",
taxon == "Scolymia Spp" ~ "SCOL",
taxon %in% c("SIDSP", "Sid SP", "SID SP.", "SID SP") ~ "SIDE",
TRUE ~ taxon
))
sort(unique(dca17$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral" "DLAB" "DSTO" "EFAS"
## [13] "HCUC" "MADR" "MALI" "MARE" "MCAV" "MDEC"
## [19] "MLAM" "MMEA" "MPHA" "MYCE" "OANN" "ODIF"
## [25] "OFAV" "PAST" "PCLI" "PFUR" "PPOR" "PSTR"
## [31] "SBOU" "SCOL" "SCUB" "SIDE" "SINT" "SRAD"
## [37] "SSID"
# Filter out unidentified corals
dca17 <- dca17 %>%
filter(!taxon %in% c("CORAL", "Cup Coral"))
# Write long data to file
write_csv(dca17, file = "data/processed/dca_2017_long.csv")
# Convert to count data
# Add explicit zeros for any taxon/size class missing at any site
dca17_counts <- dca17 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(dca17_counts, file = "data/processed/dca_2017_counts.csv")
# Aggregate count data based on taxonomic groups defined above
dca17_counts_ag <- dca17_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
dca17_counts_ag <- dca17_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(dca17_counts_ag, file = "data/processed/dca_2017_counts_ag.csv")
2021 TT site metadata
# site metadata
tt21_sitemd0 <- read_csv("data/2021_tt_recon_esa/midpoints_latlon.csv") %>%
janitor::clean_names()
tt21_sitemd <- tt21_sitemd0 %>%
mutate(name = str_remove(name, "A$")) %>%
select(site = name, longitude = lon, latitude = lat)
# There was a lot of sand in these transects that was quantified in final RECON report (though these were the same transects for the ESA and RECON datasets). Extracted this data from RECON report, Table 2:
tt21_sand <- read_csv("data/2021_tt_recon_esa/Table_2_Port_Everglades_RECON.csv") %>%
janitor::clean_names()
tt21_m_nosand <- tt21_sand %>%
mutate(area_m2 = 30 - meters_of_sc_sp) %>%
mutate(site = as.character(site))
2021 TT coral data
# coral data - recon belt transects
tt21recon0 <- readxl::read_xlsx("data/2021_tt_recon_esa/Recon 30x1m Coral Belt Transect.xlsx") %>%
janitor::clean_names()
tt21recon <- tt21recon0 %>%
select(site, taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
mutate(taxon = toupper(taxon), site = factor(site)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# ESA survey data
tt21esa0 <- readxl::read_xlsx("data/2021_tt_recon_esa/ESA Coral Belt Transect.xlsx") %>%
janitor::clean_names()
sort(unique(tt21esa0$esa_id))
## [1] "0.0" "Acropora cervicornis" "ML QC Not ESA"
## [4] "Orbicella faveolata" "Orbicella franksi" "Outside Belt"
tt21esa <- tt21esa0 %>%
mutate(site = factor(site),
taxon = case_when(
esa_id == "Orbicella franksi" ~ "OFRA",
esa_id == "Orbicella faveolata" ~ "OFAV",
esa_id == "Acropora cervicornis" ~ "ACER")) %>%
filter(!is.na(taxon)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Combine Recon and ESA survey data
tt21 <- bind_rows(tt21recon, tt21esa)
# Check taxa names
sort(unique(tt21recon$taxon))
## [1] "0" "AAGA"
## [3] "AFRAG" "ALAM"
## [5] "CNAT" "DLAB"
## [7] "DSTO" "EFAS"
## [9] "FFRAG" "JUVENILE-UNIDENTIFIABLE"
## [11] "MALC" "MANG"
## [13] "MCAV" "MDEC"
## [15] "MMEA" "MPHA"
## [17] "MYALI" "MYLAM"
## [19] "ODIF/OROB" "PAST"
## [21] "PDCLIV" "PDSTR"
## [23] "PHYLLANGIA AMERICANA" "PPOR"
## [25] "SBOU" "SCOLYMIA CUBENSIS"
## [27] "SCOLYMIA LACERA" "SINT"
## [29] "SRAD" "SSID"
## [31] "XESTO"
# Filter out unidentified OR NON-CORAL taxa
tt21 <- tt21 %>%
filter(!taxon %in% c("0", "JUVENILE-UNIDENTIFIABLE", "XESTO", "MALC"))
# Adjust/corrects species IDs
tt21 <- tt21 %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon == "FFRAG" ~ "FFRA",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "ODIF/OROB" ~ "OCUL",
taxon == "PDCLIV" ~ "PCLI",
taxon == "PDSTR" ~ "PSTR",
taxon == "PHYLLANGIA AMERICANA" ~ "PAME",
taxon == "SCOLYMIA CUBENSIS" ~ "SCUB",
taxon == "SCOLYMIA LACERA" ~ "SLAC",
TRUE ~ taxon
))
sort(unique(tt21$taxon))
## [1] "AAGA" "ACER" "AFRA" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA" "MALI"
## [11] "MANG" "MCAV" "MDEC" "MLAM" "MMEA" "MPHA" "OCUL" "OFAV" "OFRA" "PAME"
## [21] "PAST" "PCLI" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt21, file = "data/processed/tt_2021_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt21_counts <- tt21 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt21_counts, file = "data/processed/tt_2021_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt21_counts_ag <- tt21_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt21_counts_ag <- tt21_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt21_counts_ag, file = "data/processed/tt21_counts_ag.csv")
TT site metadata
# Site metadata
tt23_sitemd <- readxl::read_xlsx("data/2023_tt_impact/Impact site tracking.xlsx", skip = 1) %>%
janitor::clean_names()
tt23_sitemd <- tt23_sitemd %>%
mutate(site = transect_name,
latitude = as.numeric(actual_start_y),
longitude = as.numeric(actual_start_x)) %>%
select(site, latitude, longitude)
# Many sites missing coords in sheet.... whats up with that
tt23_sitemd <- drop_na(tt23_sitemd, longitude)
TT coral data
tt230 <- readxl::read_xlsx("data/2023_tt_impact/Impact Raw Data 05 31 2024.xlsx") %>%
janitor::clean_names()
tt23 <- tt230 %>%
select(site = transect_name, depth_ft_start,
taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
filter(taxon != "Xesto") %>%
mutate(taxon = toupper(taxon)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(site = factor(site))
tt23 <- tt23 %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Check taxa names
sort(unique(tt23$taxon))
## [1] "?" "AAGA" "ACER" "AFRA" "AFRAG" "ALAM"
## [7] "ASP" "ASP." "CNAT" "DLAB" "DSTO" "EFAS"
## [13] "FFRA" "HCUC" "ID-ABBREV" "MCAV" "MCAV?" "MDEC"
## [19] "MHEARD" "MMEA" "MSEN" "MSP." "MUSSID" "MYALI"
## [25] "MYFER" "MYLAM" "NONE" "OFAV" "OFR" "OFRA"
## [31] "OROB" "PAME" "PAST" "PCLI" "PCLI?" "PDIV"
## [37] "PPOR" "PSP" "PSP." "PSTR" "SBOU" "SCUB"
## [43] "SHYA" "SINT" "SLAC" "SRAD" "SSID" "SSP."
## [49] "STOK"
# Filter out unidentified taxa
tt23 <- tt23 %>%
filter(!taxon %in% c("?", "ID-ABBREV", "NONE", "MHEARD"))
# Adjust/corrects species IDs
tt23 <- tt23 %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon %in% c("ASP", "ASP.") ~ "AGAR",
taxon == "MCAV?" ~ "MCAV",
taxon == "MSP." ~ "MADR",
taxon == "MUSSID" ~ "MUSS",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "OFR" ~ "OFRA",
taxon == "PCLI?" ~ "PCLI",
taxon %in% c("PSP", "PSP.") ~ "PORI",
taxon == "SSP." ~ "SIDE",
taxon == "STOK" ~ "DSTO",
TRUE ~ taxon
))
sort(unique(tt23$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA"
## [11] "HCUC" "MADR" "MALI" "MCAV" "MDEC" "MFER" "MLAM" "MMEA" "MSEN" "MUSS"
## [21] "OFAV" "OFRA" "OROB" "PAME" "PAST" "PCLI" "PDIV" "PORI" "PPOR" "PSTR"
## [31] "SBOU" "SCUB" "SHYA" "SIDE" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt23, file = "data/processed/tt_2024_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt23_counts <- tt23 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt23_counts, file = "data/processed/tt_2024_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt23_counts_ag <- tt23_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt23_counts_ag <- tt23_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt23_counts_ag, file = "data/processed/tt23_counts_ag.csv")
Shedd site metadata
# Site metadata
drm24_sitemd <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names() %>%
mutate(site = as.character(drm_site_id)) %>%
select(site, latitude = lat, longitude = lon) %>%
mutate(depth = NA)
Shedd coral data
# Adult coral data from main DRM surveys
#adults0 <- read_csv("data/2024_shedd_drm/DRM_broward_corals.csv")
#adults0 %>% filter(team == "Shedd Aquarium")
alldrm2024 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
janitor::clean_names() %>%
distinct(site, team, date, subregion)
shedddrm2024 <- alldrm2024 %>% filter(team == "Shedd Aquarium")
shedddrm2024 %>%
count(subregion, date)
## # A tibble: 19 × 3
## subregion date n
## <chr> <dttm> <int>
## 1 Broward-Miami 2024-09-09 00:00:00 7
## 2 Broward-Miami 2024-09-10 00:00:00 11
## 3 Broward-Miami 2024-09-11 00:00:00 9
## 4 Broward-Miami 2024-09-12 00:00:00 12
## 5 Tortugas--Dry Tortugas NP 2024-08-06 00:00:00 9
## 6 Tortugas--Dry Tortugas NP 2024-08-07 00:00:00 10
## 7 Tortugas--Dry Tortugas NP 2024-08-08 00:00:00 11
## 8 Tortugas--Tortugas Bank 2024-08-09 00:00:00 16
## 9 Tortugas--Tortugas Bank 2024-08-10 00:00:00 3
## 10 Tortugas--Tortugas Bank 2024-08-11 00:00:00 2
## 11 Upper Keys 2024-08-24 00:00:00 2
## 12 Upper Keys 2024-08-25 00:00:00 2
## 13 Upper Keys 2024-08-26 00:00:00 5
## 14 Upper Keys 2024-08-27 00:00:00 3
## 15 Upper Keys 2024-08-28 00:00:00 2
## 16 Upper Keys 2024-08-29 00:00:00 3
## 17 Upper Keys 2024-08-30 00:00:00 6
## 18 Upper Keys 2024-08-31 00:00:00 4
## 19 Upper Keys 2024-09-01 00:00:00 5
# Most sites were included in main DRM database for 2024 -- Import these
adultst1t2 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
adultst3t4 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect3and4_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
# 9 of our PEV sites were removed from DRM database to avoid oversaturating the ares -- Import these separately
removedt1t2 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T1-T2") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
removedt3t4 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T3-T4") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
# Combine all adult coral data for Shedd DRM surveys at PEV
adults0 <- bind_rows(adultst1t2, adultst3t4, removedt1t2, removedt3t4)
# Convert adult data to long format
adults_long <- adults0 %>%
mutate(max_width_cm = pmax(width, height, na.rm = TRUE)) %>%
mutate(max_width_cm = as.character(max_width_cm)) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, taxon = species, max_width_cm) %>%
drop_na(taxon) # DROPS when taxon is blank, this is when no corals >4cm were observed
# Import juvenile counts from main DRM dataset
juv <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_JuvenileCoralData_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium")
# Import juvenile counts from sites that were removed from main DRM dataset
removed_juv <- readxl::read_xlsx("data/2024_shedd_drm/Shedd_removed_sites_Juveniles_2024.xlsx") %>%
janitor::clean_names() %>%
# Missing values in count data should be zero counts (unique to this datasheet from FWC)
mutate(across(ends_with("_ct"), ~replace_na(., 0)))
# Combine juvenile data
juv0 <- bind_rows(juv, removed_juv) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, ends_with("ct")) %>%
rename(MCAV = montastraea_ct, MUSS = mussinae_ct, FAVI = faviinae_ct, MEAN = meandrinidae_ct)
# Convert juvenile data to long format
juv_long <- juv0 %>%
pivot_longer(c(MUSS, FAVI, MEAN, MCAV), names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Other juvenile taxa counts from Transects 1 and 2 (DRM 'bonus data')
t1t2bonus <- read_csv("data/2024_shedd_drm/T1_T2_bonus_data.csv") %>%
janitor::clean_names() %>%
mutate(site = replace_na(site, "NA")) %>% # Because one site is called "NA"
mutate(transect_num = parse_number(transect))
t1t2juv <- t1t2bonus %>%
select(site, transect_num, starts_with("small")) %>%
rename_with(~ toupper(gsub("^small_", "", .x)), starts_with("small_"))
t1t2juv_long <- t1t2juv %>%
pivot_longer(3:10, names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Replace site names in t1t2 bonus data with the correct DRM site ID
penipsites <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names()
t1t2juv_long_updated <- t1t2juv_long %>%
left_join(penipsites %>% select(site, drm_site_id), by = "site") %>%
mutate(site = as.character(drm_site_id)) %>%
select(-drm_site_id)
# Combine all data
drm24_long <- bind_rows(adults_long, juv_long, t1t2juv_long_updated) %>%
mutate(team = "Shedd Aquarium")
# Check species names
sort(unique(drm24_long$taxon))
## [1] "AAGA" "ACER" "AFRA" "CNAT" "DLAB" "DSTO" "EFAS" "FAVI" "FFRA" "MALI"
## [11] "MAUR" "MCAV" "MDEC" "MEAN" "MMEA" "MUSS" "OANN" "OFAV" "OFRA" "PAST"
## [21] "PCLI" "PFUR" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SRAD" "SSID"
write_csv(drm24_long, file = "data/processed/drm_2024_long.csv")
# COUNT based on rules
# ✅ Updated Rules Summary for counting from DRM/Shedd data:
# Juvenile taxa (searched for in <4cm size class only):
# "MEAN", "MUSS", "FAVI"
# → these should only ever appear in <4cm, never >4cm, and should not be zero-filled for adults.
# Other juvenile-capable taxa:
# "MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR"
# → these can be counted in both >4cm and <4cm, but only in <4cm if juveniles were searched on that transect and team.
# Transect-based search rules still apply:
# Transects 1 & 2: all adult taxa always searched. Juvenile search depends on team:
# "Shedd Aquarium" → all juvenile taxa above searched
# others → only MEAN, MUSS, FAVI, MCAV
# Transects 3 & 4:
# only subset of adult taxa searched (adult_taxa_t3t4)
# only juveniles: MEAN, MUSS, FAVI, MCAV
# Step 1: Define size classes
drm24_classed <- drm24_long %>%
mutate(class = case_when(as.numeric(max_width_cm) >= 4 ~ ">4cm",
max_width_cm == "<4" ~ "<4cm"))
# Step 2: Define species sets
all_taxa <- unique(drm24_classed$taxon)
adult_taxa_t3t4 <- c("CNAT", "DSTO", "DLAB", "MMEA", "MANG", "MALI",
"MFER", "MLAM", "PCLI", "PSTR")
juv_only_taxa <- c("MEAN", "MUSS", "FAVI")
juv_both_taxa <- c("MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR")
all_juv_taxa <- c(juv_only_taxa, juv_both_taxa)
# Step 3: Build search grid per site × transect × team
search_grid <- drm24_classed %>%
distinct(site, transect_num, team) %>% # if multiple teams in data, remove value for team
mutate(
searched_taxa_class = pmap(list(transect_num, team), function(transect, team) {
# Helper: define juv taxa allowed for this transect/team
juv_taxa <- if (transect %in% c(1, 2)) {
if (team == "Shedd Aquarium") { # Shedd searched for other juv taxa on T1 and T2, other DRM survey teams did not
all_juv_taxa
} else {
c(juv_only_taxa, "MCAV")
}
} else {
c(juv_only_taxa, "MCAV")
}
# Adults always searched in 1 & 2, subset in 3 & 4
adult_taxa <- if (transect %in% c(1, 2)) {
setdiff(all_taxa, juv_only_taxa) # exclude juv-only taxa
} else {
adult_taxa_t3t4
}
# Build grid
bind_rows(
expand_grid(taxon = adult_taxa, class = ">4cm"),
expand_grid(taxon = juv_taxa, class = "<4cm")
)
})
) %>%
unnest(searched_taxa_class)
# Step 4: Count observations
counts <- drm24_classed %>%
group_by(site, transect_num, team, taxon, class) %>%
summarize(n = n(), .groups = "drop")
# Step 5: Join with grid and fill in zeros where appropriate
final_counts <- search_grid %>%
left_join(counts, by = c("site", "transect_num", "team", "taxon", "class")) %>%
mutate(n = replace_na(n, 0))
write_csv(final_counts, file = "data/processed/drm_2024_counts.csv")
# AGGREGATE COUNT DATA
# Aggregate taxa
drm24_counts_ag <- final_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(drm24_counts_ag, file = "data/processed/drm_2024_counts_ag.csv")
# --- STEP 1: Load KML Polygons ---
polygons <- st_read("data/Habitat classifications.kml") # update path as needed
## Reading layer `Habitat classifications' from data source
## `/Users/rosscunning/Projects/PENIP/data/Habitat classifications.kml'
## using driver `KML'
## Simple feature collection with 190 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: -80.11618 ymin: 25.97477 xmax: -80.06143 ymax: 26.25943
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# --- STEP 2: Extract Attributes from HTML Description ---
extract_attrs <- function(desc) {
if (is.na(desc) || desc == "") {
return(tibble(Habitat = NA, Type = NA, Modifier = NA, Region = NA, Type2 = NA))
}
html <- read_html(desc)
rows <- xml_find_all(html, "//table//table//tr")
keys <- rows %>% xml_find_all(".//td[1]") %>% xml_text(trim = TRUE)
vals <- rows %>% xml_find_all(".//td[2]") %>% xml_text(trim = TRUE)
n <- min(length(keys), length(vals))
named_vals <- set_names(vals[1:n], keys[1:n])
tibble(
Habitat = named_vals[["Habitat"]],
Type = named_vals[["Type"]],
Modifier = named_vals[["Modifier"]],
Region = named_vals[["Region"]],
Type2 = named_vals[["Type2"]]
)
}
# Apply function and combine with spatial geometries
attrs <- map_dfr(polygons$Description, extract_attrs)
polygons_clean <- bind_cols(polygons %>% select(-Description), attrs)
# --- STEP 3: Prepare Site Coordinate Data ---
# Get all site coordinates, and assign north and south
# Combine site metadata
allsitemd <- bind_rows(.id = "dataset",
nsu11_esa = nsu11_esa_sitemd,
dca17_esa = dca17_esa_sitemd,
dca17 = dca17_sitemd,
tt21 = tt21_sitemd,
tt23 = tt23_sitemd,
drm24 = drm24_sitemd
) %>%
mutate(dir = if_else(latitude > 26.093570, "N", "S"))
points <- st_as_sf(allsitemd, coords = c("longitude", "latitude"), crs = 4326)
# --- STEP 4: Validate Geometry and Match CRS ---
polygons_clean <- polygons_clean %>%
st_zm(drop = TRUE, what = "ZM") %>%
st_make_valid() %>%
st_transform(st_crs(points))
sf_use_s2(FALSE) # prevent s2 geometry issues
# --- STEP 5: Spatial Join ---
joined <- st_join(points, polygons_clean, join = st_within)
joined_df <- joined %>%
mutate(longitude = st_coordinates(.)[,1],
latitude = st_coordinates(.)[,2]) %>%
st_drop_geometry()
allsitemd <- joined_df
# 'Sand' overlaps some of the other polygons instead of just surrounding them...
# If a point is classified as Sand AND something else, remove Sand...
multiclass <- allsitemd %>%
group_by(site) %>%
filter(n() > 1)
allsitemd_clean <- allsitemd %>%
group_by(site) %>%
filter(!(Type == "Sand" & n() > 1)) %>%
ungroup()
# Add factor if survey was ESA coral species only
allsitemd_clean <- allsitemd_clean %>%
mutate(ESAonly = if_else(dataset %in% c("nsu11_esa", "dca17_esa"), "ESA only", "All corals"))
# Visualize habitat classifications
polyplot <- polygons_clean %>%
#filter(Type != "Sand") %>%
ggplot() +
geom_sf(aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
facet_wrap(~ESAonly) +
theme_minimal() +
labs(title = "Habitat Polygons Colored by Type", fill = "Type") +
theme(legend.position = "right") +
xlim(-80.11, -80.079) +
ylim(26.0675, 26.11)
# Plot all surveyed sites
polyplot +
geom_point(data = allsitemd_clean,
aes(x = longitude, y = latitude, shape = dataset),
inherit.aes = FALSE, alpha = 0.6) +
scale_shape_manual(values = c(2, 3, 4, 5, 6, 7)) +
labs(title = "All sites - all surveys")
# Combine all aggregated count data
df <- bind_rows(.id = "dataset",
nsu11_esa = nsu11_esa_counts_ag,
dca17_esa = dca17_esa_counts_ag,
dca17 = dca17_counts_ag,
tt21 = tt21_counts_ag,
tt23 = tt23_counts_ag,
drm24 = select(drm24_counts_ag, -team)
) %>%
mutate(transect_num = if_else(is.na(transect_num), 1, transect_num))
# Add transect area information
## Site-specific areas for tt21 (since there was sand in transects)
tt21_areas <- tt21_m_nosand %>%
mutate(dataset = "tt21") %>%
distinct(dataset, site, transect_area_m2 = area_m2)
df <- df %>% left_join(tt21_areas)
## Other datasets had fixed transect areas
df <- df %>%
mutate(transect_area_m2 = case_when(
# Count-specific areas for nsu11 (since they did tier 1 surveys and tier 2 only if n > 5 for a taxon)
dataset == "nsu11_esa" & n >= 5 ~ 600,
dataset == "nsu11_esa" & n < 5 ~ 3600,
dataset == "dca17_esa" ~ 784,
dataset == "dca17" ~ 30, # DCA belt transects were 30m
dataset == "tt23" ~ 20, # TT23 belt transects were 20m
dataset == "drm24" ~ 10, # DRM belt transects were 10m)
TRUE ~ transect_area_m2))
# Remove surveys with transect_area_m2 == 0 (a couple of TT21 surveys were all sand)
df <- df %>% filter(transect_area_m2 > 0)
# Select sites in Nearshore Ridge Complex, Inner Reef, and Middle Reef, Outer Reef and Aggregated Patch Reef
selected <- allsitemd_clean %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Artificial", "Outer Reef", "Aggregated Patch Reef"))
# Plot all selected sites
polyplot +
geom_point(data = selected,
aes(x = longitude, y = latitude, shape = dataset),
inherit.aes = FALSE, alpha = 0.6) +
scale_shape_manual(values = c(2, 3, 4, 5, 6, 7)) +
facet_wrap(~ESAonly) +
labs(title = "Selected sites - all surveys")
# Filter dataset to just selected sites
dff <- df %>% inner_join(selected)
First, look at total coral density between the nearer vs. farther sites
drm24_NRCS <- drm24_counts_ag %>%
left_join(allsitemd_clean) %>%
filter(Type == "Nearshore Ridge Complex", dir == "S")
drm24_NRCS <- drm24_NRCS %>%
mutate(grp = if_else(latitude > 26.08, "near", "far"))
# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ grp, data = drm24_NRCS)
# Generate new data only for existing taxon-size class combinations
newdata_1 <- drm24_NRCS %>%
distinct(grp)
# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)
# Compute total coral density
results_nb <- newdata_1 %>%
mutate(
fit = exp(preds_nb$fit), # Convert fitted values to response scale
fit_se = exp(preds_nb$fit) * preds_nb$se.fit, # Convert SE using the Delta Method
fit_var = (fit * preds_nb$se.fit)^2, # Variance propagation
fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit), # Lower CI
fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit) # Upper CI
)
# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
group_by(grp) %>%
summarize(
total_density = sum(fit),
total_se = sqrt(sum(fit_var)),
lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
)
knitr::kable(total_ci_nb)
| grp | total_density | total_se | lower_95CI | upper_95CI |
|---|---|---|---|---|
| far | 1.014006 | 0.1961258 | 0.6940657 | 1.4814266 |
| near | 0.719611 | 0.1074599 | 0.5370134 | 0.9642963 |
Next, look at community composition between the nearer vs. farther sites
# 1. Create a wide community matrix: rows = site x grp, columns = taxa
comm_matrix <- drm24_NRCS %>%
group_by(site, grp, taxon) %>%
summarize(total_n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)
# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -grp)
site_info <- comm_matrix %>% select(site, grp)
# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123) # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.165012
## Run 1 stress 0.2193914
## Run 2 stress 0.2040358
## Run 3 stress 0.2193195
## Run 4 stress 0.1682346
## Run 5 stress 0.165012
## ... New best solution
## ... Procrustes: rmse 0.000116517 max resid 0.0002972814
## ... Similar to previous best
## Run 6 stress 0.2040358
## Run 7 stress 0.1682346
## Run 8 stress 0.165012
## ... Procrustes: rmse 1.578068e-05 max resid 4.150584e-05
## ... Similar to previous best
## Run 9 stress 0.165012
## ... Procrustes: rmse 3.239688e-05 max resid 8.954374e-05
## ... Similar to previous best
## Run 10 stress 0.165012
## ... Procrustes: rmse 0.000123681 max resid 0.0003064821
## ... Similar to previous best
## Run 11 stress 0.2361485
## Run 12 stress 0.2168096
## Run 13 stress 0.165012
## ... Procrustes: rmse 0.0001056002 max resid 0.0002717468
## ... Similar to previous best
## Run 14 stress 0.1682346
## Run 15 stress 0.165012
## ... Procrustes: rmse 4.909749e-05 max resid 0.0001136266
## ... Similar to previous best
## Run 16 stress 0.165012
## ... Procrustes: rmse 4.240482e-05 max resid 9.704908e-05
## ... Similar to previous best
## Run 17 stress 0.165012
## ... Procrustes: rmse 5.282838e-05 max resid 0.0001262477
## ... Similar to previous best
## Run 18 stress 0.165012
## ... New best solution
## ... Procrustes: rmse 1.071167e-05 max resid 3.009965e-05
## ... Similar to previous best
## Run 19 stress 0.2193195
## Run 20 stress 0.1682346
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
bind_cols(site_info)
# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = grp)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "NMDS of Coral Community Structure\nby Dist from Channel in NRC South",
color = "Dist from channel")
Density and community composition are not different in the NRC S sites that are nearer vs. farther from the channel. So, no need to exclude the farther sites.
‘Artificial’ is only present in DCA and minorly in TT – but absent
from Shedd.
It is in close spatial proximity to Nearshore Ridge Complex – can these
be combined?
Test for differences in coral density in DCA survey between habitat types
# Subset DCA data
dcadf <- dca17_counts_ag %>%
left_join(allsitemd_clean) %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef", "Artificial", "Outer Reef", "Aggregated Patch Reef"))
# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ Type, data = dcadf)
# Generate new data only for existing taxon-size class combinations
newdata_1 <- dcadf %>%
distinct(Type)
# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)
# Compute both total coral density & taxon-size class-specific densities in one step
results_nb <- newdata_1 %>%
mutate(
fit = exp(preds_nb$fit), # Convert fitted values to response scale
fit_se = exp(preds_nb$fit) * preds_nb$se.fit, # Convert SE using the Delta Method
fit_var = (fit * preds_nb$se.fit)^2, # Variance propagation
fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit), # Lower CI
fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit) # Upper CI
)
# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
group_by(Type) %>%
summarize(
total_density = sum(fit),
total_se = sqrt(sum(fit_var)),
lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
)
knitr::kable(total_ci_nb)
| Type | total_density | total_se | lower_95CI | upper_95CI |
|---|---|---|---|---|
| Aggregated Patch Reef | 1.5774194 | 0.1823963 | 1.2575386 | 1.978668 |
| Artificial | 1.7580645 | 0.2024839 | 1.4028022 | 2.203298 |
| Inner Reef | 1.3680352 | 0.1517039 | 1.1007895 | 1.700162 |
| Middle Reef | 0.8573201 | 0.0896931 | 0.6983746 | 1.052441 |
| Nearshore Ridge Complex | 1.9869432 | 0.1572876 | 1.7013849 | 2.320429 |
| Outer Reef | 1.5945409 | 0.1022314 | 1.4062459 | 1.808049 |
Highly overlapping confidence intervals for Artifical and Nearshore Ridge Complex, so no difference in coral density.
Test for differences in community composition in DCA survey
library(vegan)
# 1. Create a wide community matrix: rows = site x Type, columns = taxa
comm_matrix <- dcadf %>%
group_by(site, Type, taxon) %>%
summarize(total_n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)
# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -Type)
site_info <- comm_matrix %>% select(site, Type)
# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123) # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2296046
## Run 1 stress 0.2328198
## Run 2 stress 0.236287
## Run 3 stress 0.2306855
## Run 4 stress 0.2321711
## Run 5 stress 0.2329833
## Run 6 stress 0.2345872
## Run 7 stress 0.2404346
## Run 8 stress 0.2343496
## Run 9 stress 0.239751
## Run 10 stress 0.2372008
## Run 11 stress 0.2311242
## Run 12 stress 0.2320106
## Run 13 stress 0.2299447
## ... Procrustes: rmse 0.009342091 max resid 0.1237386
## Run 14 stress 0.2358043
## Run 15 stress 0.2384259
## Run 16 stress 0.2314443
## Run 17 stress 0.2368204
## Run 18 stress 0.2418342
## Run 19 stress 0.2322785
## Run 20 stress 0.2375749
## Run 21 stress 0.231392
## Run 22 stress 0.2389861
## Run 23 stress 0.2352179
## Run 24 stress 0.2335193
## Run 25 stress 0.230046
## ... Procrustes: rmse 0.02784978 max resid 0.2351796
## Run 26 stress 0.2342695
## Run 27 stress 0.234528
## Run 28 stress 0.2353441
## Run 29 stress 0.2339306
## Run 30 stress 0.2388938
## Run 31 stress 0.2345629
## Run 32 stress 0.2314763
## Run 33 stress 0.23695
## Run 34 stress 0.2317871
## Run 35 stress 0.2356343
## Run 36 stress 0.2353547
## Run 37 stress 0.2347807
## Run 38 stress 0.2415145
## Run 39 stress 0.2327134
## Run 40 stress 0.2337619
## Run 41 stress 0.2366197
## Run 42 stress 0.231129
## Run 43 stress 0.2334511
## Run 44 stress 0.230302
## Run 45 stress 0.2307399
## Run 46 stress 0.2374532
## Run 47 stress 0.2305006
## Run 48 stress 0.2321733
## Run 49 stress 0.2307093
## Run 50 stress 0.2296049
## ... Procrustes: rmse 9.00414e-05 max resid 0.0008408963
## ... Similar to previous best
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
bind_cols(site_info)
# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Type)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "NMDS of Coral Community Structure by Habitat Type",
color = "Habitat Type")
High degree of similarity between Artificial and Nearshore Ridge Complex coral communities.
Combine ‘Artificial’ with ‘Nearshore Ridge Complex’
# Based on these results, combine "Artificial" with "Nearshore Ridge Complex"
dff[dff$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"
Remove lowest abundance coral taxa
These will be problematic for statistical models
# Drop taxa with very few observations
sppcounts <- dff %>%
group_by(taxon) %>%
summarize(total = sum(n), .groups = "drop") %>%
arrange(total)
sppcounts
## # A tibble: 22 × 2
## taxon total
## <chr> <dbl>
## 1 MANG 0
## 2 FFRA 1
## 3 HCUC 1
## 4 PAME 1
## 5 SCOL 2
## 6 OCUL 3
## 7 EFAS 35
## 8 MUSS 37
## 9 MYCE 56
## 10 MMEA 95
## # ℹ 12 more rows
dff <- dff %>%
filter(taxon %in% filter(sppcounts, total >= 5)$taxon) %>%
mutate(Type = factor(Type, levels = type_levels))
Taxa with less than 5 total observations were filtered out, which included: HCUC, MANG, PAME, FFRA, OCUL, SCOL
(nsu11_esa_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "nsu11_esa"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2011 — NSU ESA Survey"))
(dca17_esa_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "dca17_esa"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2017 — DCA ESA Survey"))
(dca_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "dca17"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2017 — DCA RECON Survey"))
(tt21_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt21"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2021 — Tetra Tech Survey"))
(tt23_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt23"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2023 — Tetra Tech Survey"))
(drm24_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "drm24"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2024 — Shedd Survey"))
Predictors: dataset, habitat Type, direction from channel, taxon
# First, analyze surveys that included ALL corals (exclude ESA ONLY surveys)
dff_alltaxa <- dff %>%
filter(!dataset %in% c("nsu11_esa", "dca17_esa"))
# Get total counts for each taxon at each site
dfftaxon <- dff_alltaxa %>%
group_by(dataset, Type, dir, site, transect_num, transect_area_m2, taxon) %>%
summarize(n = sum(n)) %>%
ungroup()
# Remove any taxa not observed in a given dataset / habitat Type ?????
dfftaxon_trimmed <- dfftaxon %>%
group_by(dataset, Type, dir, taxon) %>%
filter(any(n > 0)) %>%
ungroup()
# SUPER MODEL
# mod_nb <- brm(
# bf(n ~ dataset * Type * dir * taxon + offset(log(transect_area_m2))),
# family = negbinomial(),
# data = dfftaxon_trimmed,
# prior = c(prior(normal(0, 2), class = "b"), # Weak prior on coefficients
# prior(normal(0, 5), class = "Intercept"), # Weak prior on intercept
# prior(exponential(1), class = "shape")), # Reasonable prior for NB dispersion
# chains = 4,
# cores = 4,
# threads = threading(5),
# iter = 1000, warmup = 500,
# thin = 2,
# control = list(adapt_delta = 0.9, max_treedepth = 12),
# backend = "cmdstanr"
# )
# saveRDS(mod_nb, file = "data/processed/mod_nb.rds")
mod_nb <- readRDS("data/processed/mod_nb.rds")
# 1. Create newdata grid (1 m² for standardization)
newdata <- dfftaxon_trimmed %>%
distinct(dataset, Type, dir, taxon) %>%
mutate(transect_area_m2 = 1)
# 2. Get fitted values manually by computing summary statistics across all draws
posterior_draws <- add_epred_draws(mod_nb, newdata = newdata) %>%
mutate(Type = factor(Type, levels = type_levels),
dataset = factor(dataset, levels = dataset_levels))
# Compute and plot total coral abundance by dataset, habitat Type, and direction from channel
total_abund <- posterior_draws %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop") %>%
group_by(dataset, Type, dir) %>%
summarize(
fit_mean = mean(total_epred),
fit_sd = sd(total_epred),
fit_lower = quantile(total_epred, 0.025),
fit_upper = quantile(total_epred, 0.975),
.groups = "drop"
)
ggplot(total_abund, aes(x = dataset, y = fit_mean, color = dir)) +
facet_grid(~ Type, labeller = as_labeller(type_labels)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5)) +
scale_x_discrete(labels = dataset_labels) +
theme(axis.text.x = element_text(angle = 90))
# Any N-S differences in total coral abundance?
total_NS_diff <- posterior_draws %>%
ungroup() %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop") %>%
pivot_wider(names_from = dir, values_from = total_epred) %>%
filter(!is.na(N), !is.na(S)) %>% # make sure both are present
mutate(diff = N - S) # or S - N depending on desired contrast
total_NS_summ <- total_NS_diff %>%
group_by(dataset, Type) %>%
summarize(
mean_N = mean(N),
mean_S = mean(S),
mean_diff = mean(diff),
lower_95 = quantile(diff, 0.025),
upper_95 = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)), # <-- two-sided p-value
.groups = "drop"
)
total_NS_summ %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
knitr::kable()
| dataset | Type | mean_N | mean_S | mean_diff | lower_95 | upper_95 | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|
| tt21 | Nearshore Ridge Complex | 0.1638554 | 2.733225 | -2.5693694 | -4.2110532 | -1.571681 | 0 | 0 |
| tt23 | Nearshore Ridge Complex | 1.8897405 | 1.165636 | 0.7241043 | 0.2886346 | 1.177246 | 0 | 0 |
| drm24 | Nearshore Ridge Complex | 3.8277422 | 2.372224 | 1.4555180 | 0.4503713 | 2.609471 | 0 | 0 |
# 1. Sum predicted values across taxa (total coral density per draw × dataset × Type × dir)
draws_total <- posterior_draws %>%
mutate(dataset = as.character(dataset)) %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop")
# 2. Self-join to compare dataset pairs within each draw × Type × dir
pairwise_total <- draws_total %>%
rename(dataset1 = dataset, epred1 = total_epred) %>%
inner_join(
draws_total %>% rename(dataset2 = dataset, epred2 = total_epred),
by = c(".draw", "Type", "dir")
) %>%
filter(dataset1 < dataset2) %>%
mutate(diff = epred1 - epred2)
# 3. Summarize posterior contrasts
summary_total_contrasts <- pairwise_total %>%
group_by(Type, dir, dataset1, dataset2) %>%
summarize(
mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop"
)
# 4. Filter significant differences
summary_total_contrasts %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
arrange(Type, dir) %>%
knitr::kable()
| Type | dir | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|
| Nearshore Ridge Complex | N | dca17 | drm24 | -2.0722002 | -3.2429805 | -1.1342962 | 0 | 0 |
| Nearshore Ridge Complex | N | drm24 | tt21 | 3.6638868 | 2.7103310 | 4.7773470 | 0 | 0 |
| Nearshore Ridge Complex | N | drm24 | tt23 | 1.9380017 | 0.9830099 | 3.0853027 | 0 | 0 |
| Nearshore Ridge Complex | S | dca17 | tt23 | 0.9823635 | 0.5373184 | 1.4666612 | 0 | 0 |
| Nearshore Ridge Complex | S | drm24 | tt23 | 1.2065880 | 0.7326402 | 1.7314905 | 0 | 0 |
| Nearshore Ridge Complex | S | tt21 | tt23 | 1.5675886 | 0.5939458 | 3.2024142 | 0 | 0 |
| Middle Reef | N | dca17 | drm24 | -1.8509530 | -2.9474803 | -1.0637058 | 0 | 0 |
| Middle Reef | N | dca17 | tt23 | -0.6249693 | -1.0625599 | -0.2700722 | 0 | 0 |
| Outer Reef | N | dca17 | tt21 | -1.7565019 | -3.0106186 | -0.8402466 | 0 | 0 |
| Outer Reef | N | dca17 | tt23 | -0.7840636 | -1.2582951 | -0.3406109 | 0 | 0 |
| Outer Reef | S | dca17 | tt21 | -1.5817993 | -3.0670055 | -0.5968518 | 0 | 0 |
| Outer Reef | S | dca17 | tt23 | -1.1728175 | -1.9566649 | -0.5124615 | 0 | 0 |
fitted_taxon <- posterior_draws %>%
group_by(dataset, Type, dir, taxon) %>%
summarize(fit_mean = mean(.epred), # Posterior mean (expected value)
fit_sd = sd(.epred), # Posterior standard deviation
fit_lower = quantile(.epred, 0.025), # 2.5% quantile (lower CI)
fit_upper = quantile(.epred, 0.975)) # 97.5% quantile (upper CI)
# Plot
ggplot(fitted_taxon, aes(y = fit_mean, x = Type, color = dir, shape = dataset,
group = interaction(dataset, dir))) +
facet_wrap(taxon ~ ., scales = "free_x") +
geom_point(position = position_dodge(width = 0.5)) +
geom_line(position = position_dodge(width = 0.5), alpha = 0.5) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5), lwd = 0.25) +
scale_y_log10() +
scale_x_discrete(labels = type_labels) +
labs(x = "Habitat Type", y = "Density (per m2)")
# Any N-S differences in taxon abundance within habitat Types/datasets?
#### Pivot wide so you can calculate N vs S difference per draw
taxon_NS_diff <- posterior_draws %>%
ungroup() %>%
select(.draw, dataset, Type, dir, taxon, .epred) %>%
pivot_wider(names_from = dir, values_from = .epred) %>%
filter(!is.na(N), !is.na(S)) %>% # ensure both directions exist for the draw
mutate(diff = S - N) # or N - S depending on interpretation
taxon_NS_summ <- taxon_NS_diff %>%
group_by(dataset, Type, taxon) %>%
summarize(
mean_diff = mean(diff),
lower_95 = quantile(diff, 0.025),
upper_95 = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop"
)
taxon_NS_summ %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
mutate(greater = if_else(mean_diff > 0, "N", "S")) %>%
group_by(dataset, Type, greater) %>%
summarize(taxa = paste(taxon, collapse = ",")) %>%
arrange(Type, greater) %>%
knitr::kable()
| dataset | Type | greater | taxa |
|---|---|---|---|
| dca17 | Nearshore Ridge Complex | N | MCAV,MEAN,SINT |
| dca17 | Nearshore Ridge Complex | S | ACER |
| drm24 | Nearshore Ridge Complex | S | SIDE |
| dca17 | Middle Reef | N | MADR |
# Ensure dataset is character so we can do < comparison
draws_clean <- posterior_draws %>%
mutate(dataset = as.character(dataset))
# One self-join to get all unique dataset pairs
pairwise_contrasts <- draws_clean %>%
rename(dataset1 = dataset, epred1 = .epred) %>%
inner_join(
draws_clean %>% rename(dataset2 = dataset, epred2 = .epred),
by = c(".draw", "taxon", "dir", "Type")
) %>%
filter(dataset1 < dataset2) %>% # Avoid duplicates and self-pairs
mutate(diff = epred1 - epred2)
# Step: summarize the posterior differences
summary_contrasts <- pairwise_contrasts %>%
group_by(taxon, dir, Type, dataset1, dataset2) %>%
summarize(mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop")
summary_contrasts %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
arrange(taxon, Type, dir) %>%
knitr::kable()
| taxon | dir | Type | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|---|
| AGAR | N | Outer Reef | dca17 | tt21 | -0.0743928 | -0.1732024 | -0.0174495 | 0 | 0 |
| DSTO | N | Nearshore Ridge Complex | dca17 | tt23 | -0.0272579 | -0.0518685 | -0.0105298 | 0 | 0 |
| FAVI | S | Nearshore Ridge Complex | dca17 | drm24 | -0.0319854 | -0.0613998 | -0.0092640 | 0 | 0 |
| FAVI | S | Nearshore Ridge Complex | drm24 | tt23 | 0.0467998 | 0.0259353 | 0.0727550 | 0 | 0 |
| MCAV | N | Nearshore Ridge Complex | dca17 | tt23 | -0.1262681 | -0.1895953 | -0.0736573 | 0 | 0 |
| MCAV | N | Nearshore Ridge Complex | drm24 | tt23 | -0.1332863 | -0.2016460 | -0.0776296 | 0 | 0 |
| MCAV | N | Inner Reef | dca17 | tt23 | -0.3724693 | -0.8338104 | -0.1163532 | 0 | 0 |
| MEAN | S | Nearshore Ridge Complex | dca17 | drm24 | 0.0419309 | 0.0255211 | 0.0615118 | 0 | 0 |
| MMEA | N | Aggregated Patch Reef | dca17 | tt21 | -0.2637638 | -0.8261646 | -0.0377758 | 0 | 0 |
| MUSS | S | Nearshore Ridge Complex | dca17 | drm24 | -0.0116444 | -0.0233064 | -0.0033990 | 0 | 0 |
| PORI | N | Nearshore Ridge Complex | dca17 | drm24 | -0.3514797 | -0.5692421 | -0.1976537 | 0 | 0 |
| PORI | N | Nearshore Ridge Complex | drm24 | tt23 | 0.4229911 | 0.2713027 | 0.6344347 | 0 | 0 |
| PORI | S | Nearshore Ridge Complex | dca17 | drm24 | -0.2860491 | -0.4493616 | -0.1651409 | 0 | 0 |
| PORI | S | Nearshore Ridge Complex | dca17 | tt23 | 0.1363431 | 0.0952311 | 0.1858206 | 0 | 0 |
| PORI | S | Nearshore Ridge Complex | drm24 | tt23 | 0.4223922 | 0.3044330 | 0.5684033 | 0 | 0 |
| PORI | S | Nearshore Ridge Complex | tt21 | tt23 | 0.3457599 | 0.1745594 | 0.6508403 | 0 | 0 |
| PORI | S | Inner Reef | dca17 | drm24 | -0.3065865 | -0.5300796 | -0.1307656 | 0 | 0 |
| PORI | S | Inner Reef | drm24 | tt23 | 0.4101004 | 0.2584220 | 0.6343684 | 0 | 0 |
| SIDE | N | Nearshore Ridge Complex | dca17 | drm24 | -1.6396539 | -2.7817252 | -0.7591785 | 0 | 0 |
| SIDE | N | Nearshore Ridge Complex | drm24 | tt23 | 1.7881154 | 0.8764428 | 2.9300437 | 0 | 0 |
| SIDE | S | Nearshore Ridge Complex | dca17 | tt23 | 0.9752119 | 0.5421562 | 1.4451685 | 0 | 0 |
| SIDE | S | Nearshore Ridge Complex | drm24 | tt23 | 0.9633752 | 0.5511315 | 1.4582500 | 0 | 0 |
| SIDE | S | Nearshore Ridge Complex | tt21 | tt23 | 1.4182959 | 0.4792489 | 3.0617184 | 0 | 0 |
| SIDE | N | Middle Reef | dca17 | drm24 | -1.3480286 | -2.4328561 | -0.6226663 | 0 | 0 |
| SIDE | N | Middle Reef | dca17 | tt23 | -0.4318349 | -0.8024504 | -0.1656625 | 0 | 0 |
| SIDE | N | Outer Reef | dca17 | tt21 | -0.8668175 | -1.9462672 | -0.2468464 | 0 | 0 |
| SIDE | N | Outer Reef | dca17 | tt23 | -0.6077766 | -1.0003850 | -0.3146779 | 0 | 0 |
| SIDE | S | Outer Reef | dca17 | tt23 | -0.8396635 | -1.4962463 | -0.3589375 | 0 | 0 |
| SINT | N | Nearshore Ridge Complex | dca17 | tt23 | -0.2313220 | -0.3458246 | -0.1442119 | 0 | 0 |
| SINT | N | Nearshore Ridge Complex | drm24 | tt23 | -0.1870873 | -0.3104628 | -0.0870124 | 0 | 0 |
| SINT | S | Nearshore Ridge Complex | dca17 | tt23 | -0.1655151 | -0.2818192 | -0.0745273 | 0 | 0 |
| SINT | S | Nearshore Ridge Complex | drm24 | tt23 | -0.1850827 | -0.3057516 | -0.0847900 | 0 | 0 |
| SOLE | S | Outer Reef | dca17 | tt21 | -0.0434264 | -0.1036845 | -0.0097645 | 0 | 0 |
| SOLE | S | Outer Reef | dca17 | tt23 | -0.0383666 | -0.0774636 | -0.0128291 | 0 | 0 |
COMBINE N AND S FOR THIS ANALYSIS BASED ON FEW DIFFS DETECTED EARLIER TO GAIN STATISTICAL POWER?
# Collapse across direction (North/South) by averaging per draw
draws_collapsed <- posterior_draws %>%
group_by(.draw, dataset, Type, taxon) %>%
summarize(epred = mean(.epred), .groups = "drop") %>%
mutate(dataset = as.character(dataset))
pairwise_taxon_diffs <- draws_collapsed %>%
rename(dataset1 = dataset, epred1 = epred) %>%
inner_join(
draws_collapsed %>% rename(dataset2 = dataset, epred2 = epred),
by = c(".draw", "Type", "taxon")
) %>%
filter(dataset1 < dataset2) %>%
mutate(diff = epred1 - epred2)
summary_taxon_diffs <- pairwise_taxon_diffs %>%
group_by(Type, taxon, dataset1, dataset2) %>%
summarize(
mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop"
)
summary_taxon_diffs %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
arrange(taxon, Type) %>%
knitr::kable()
| Type | taxon | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|
| Nearshore Ridge Complex | FAVI | drm24 | tt23 | 0.0395553 | 0.0212871 | 0.0608927 | 0 | 0 |
| Nearshore Ridge Complex | MCAV | drm24 | tt23 | -0.0914420 | -0.1375599 | -0.0507294 | 0 | 0 |
| Nearshore Ridge Complex | MEAN | dca17 | drm24 | 0.0272651 | 0.0152878 | 0.0403764 | 0 | 0 |
| Aggregated Patch Reef | MMEA | dca17 | tt21 | -0.2695677 | -0.8301873 | -0.0445987 | 0 | 0 |
| Nearshore Ridge Complex | MUSS | dca17 | drm24 | -0.0116444 | -0.0233064 | -0.0033990 | 0 | 0 |
| Nearshore Ridge Complex | PORI | dca17 | drm24 | -0.3187644 | -0.4531957 | -0.2140374 | 0 | 0 |
| Nearshore Ridge Complex | PORI | dca17 | tt21 | -0.2213819 | -0.5155477 | -0.0531799 | 0 | 0 |
| Nearshore Ridge Complex | PORI | dca17 | tt23 | 0.1039272 | 0.0702083 | 0.1419702 | 0 | 0 |
| Nearshore Ridge Complex | PORI | drm24 | tt23 | 0.4226916 | 0.3194540 | 0.5479840 | 0 | 0 |
| Nearshore Ridge Complex | PORI | tt21 | tt23 | 0.3253091 | 0.1559066 | 0.6240335 | 0 | 0 |
| Inner Reef | PORI | dca17 | drm24 | -0.2427631 | -0.3830925 | -0.1306986 | 0 | 0 |
| Inner Reef | PORI | drm24 | tt23 | 0.2662808 | 0.1413177 | 0.4118703 | 0 | 0 |
| Nearshore Ridge Complex | SIDE | dca17 | drm24 | -0.8139086 | -1.4232669 | -0.2785692 | 0 | 0 |
| Nearshore Ridge Complex | SIDE | dca17 | tt23 | 0.5618367 | 0.2406640 | 0.9210361 | 0 | 0 |
| Nearshore Ridge Complex | SIDE | drm24 | tt23 | 1.3757453 | 0.8594618 | 2.0078132 | 0 | 0 |
| Middle Reef | SIDE | dca17 | drm24 | -0.9500748 | -1.6427105 | -0.4757017 | 0 | 0 |
| Middle Reef | SIDE | dca17 | tt23 | -0.4270717 | -0.7093608 | -0.2001004 | 0 | 0 |
| Outer Reef | SIDE | dca17 | tt21 | -0.9714069 | -1.7866271 | -0.4237102 | 0 | 0 |
| Outer Reef | SIDE | dca17 | tt23 | -0.7237200 | -1.0865961 | -0.4315643 | 0 | 0 |
| Nearshore Ridge Complex | SINT | dca17 | tt23 | -0.1984185 | -0.2825473 | -0.1329929 | 0 | 0 |
| Nearshore Ridge Complex | SINT | drm24 | tt23 | -0.1860850 | -0.2704737 | -0.1145137 | 0 | 0 |
| Outer Reef | SOLE | dca17 | tt21 | -0.0433702 | -0.1021788 | -0.0100907 | 0 | 0 |
| Outer Reef | SOLE | dca17 | tt23 | -0.0271521 | -0.0485932 | -0.0109315 | 0 | 0 |
TEST IF SCTLD-SUSCEPTIBLE CORALS AGGREGATED SHOW SIG DIFFS ACROSS SURVEYS?
# susceptibility from Papke et al. 2024
group_definitions <- tibble(
taxon = unique(posterior_draws$taxon),
taxon_group = case_when(
taxon %in% c("FAVI", "MUSS", "MEAN", "DSTO", "EFAS", "MMEA") ~ "high_sus",
taxon %in% c("MCAV", "ORBI", "SIDE", "SINT", "SOLE") ~ "med_sus",
taxon %in% c("ACER", "PORI", "AGAR", "MYCE", "MADR") ~ "low_sus",
TRUE ~ NA_character_
)
)
draws_grouped <- posterior_draws %>%
left_join(group_definitions, by = "taxon") %>%
filter(!is.na(taxon_group)) # Remove taxa not assigned to a group
draws_group_sums <- draws_grouped %>%
group_by(.draw, dataset, Type, taxon_group) %>%
summarize(epred = sum(.epred), .groups = "drop") %>%
mutate(dataset = as.character(dataset))
pairwise_group_diffs <- draws_group_sums %>%
rename(dataset1 = dataset, epred1 = epred) %>%
inner_join(
draws_group_sums %>% rename(dataset2 = dataset, epred2 = epred),
by = c(".draw", "Type", "taxon_group")
) %>%
filter(dataset1 < dataset2) %>% # Avoid redundant pairs and self-comparisons
mutate(diff = epred1 - epred2)
summary_group_diffs <- pairwise_group_diffs %>%
group_by(Type, taxon_group, dataset1, dataset2) %>%
summarize(
mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop"
) %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni"))
summary_group_diffs %>%
filter(p_adj < 0.05) %>%
arrange(taxon_group, Type) %>%
knitr::kable()
| Type | taxon_group | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|---|
| Nearshore Ridge Complex | high_sus | dca17 | tt21 | 0.1054008 | 0.0545890 | 0.1498935 | 0 | 0 |
| Nearshore Ridge Complex | high_sus | drm24 | tt21 | 0.1227181 | 0.0585407 | 0.1858453 | 0 | 0 |
| Nearshore Ridge Complex | low_sus | dca17 | drm24 | -0.6805585 | -0.9433098 | -0.4684761 | 0 | 0 |
| Nearshore Ridge Complex | low_sus | dca17 | tt23 | 0.2444170 | 0.1720783 | 0.3188149 | 0 | 0 |
| Nearshore Ridge Complex | low_sus | drm24 | tt23 | 0.9249755 | 0.7166012 | 1.1843958 | 0 | 0 |
| Nearshore Ridge Complex | low_sus | tt21 | tt23 | 0.2753823 | 0.0998425 | 0.5816253 | 0 | 0 |
| Inner Reef | low_sus | dca17 | drm24 | -0.5178437 | -0.8064140 | -0.2904408 | 0 | 0 |
| Inner Reef | low_sus | drm24 | tt21 | 0.5998535 | 0.3359713 | 0.8830058 | 0 | 0 |
| Inner Reef | low_sus | drm24 | tt23 | 0.4779622 | 0.2065152 | 0.7669523 | 0 | 0 |
| Nearshore Ridge Complex | med_sus | drm24 | tt23 | 2.1835852 | 1.1136569 | 3.4289880 | 0 | 0 |
| Inner Reef | med_sus | drm24 | tt21 | 2.3424230 | 1.2352835 | 3.3684610 | 0 | 0 |
| Inner Reef | med_sus | tt21 | tt23 | -2.3039653 | -3.6991883 | -1.0764120 | 0 | 0 |
| Middle Reef | med_sus | dca17 | drm24 | -2.0964769 | -3.5233008 | -1.0520341 | 0 | 0 |
| Middle Reef | med_sus | dca17 | tt23 | -1.1896394 | -1.8432436 | -0.6049062 | 0 | 0 |
| Outer Reef | med_sus | dca17 | tt21 | -2.5711711 | -4.4000393 | -1.3290573 | 0 | 0 |
| Outer Reef | med_sus | dca17 | tt23 | -1.5083146 | -2.2471151 | -0.8077222 | 0 | 0 |
# Proportion of transect area per dataset × Type × dir
survey_weights <- dfftaxon %>%
group_by(dataset, Type, dir) %>%
summarize(area = sum(transect_area_m2), .groups = "drop") %>%
group_by(dataset) %>%
mutate(weight = area / sum(area)) %>%
select(dataset, Type, dir, weight)
posterior_weighted <- posterior_draws %>%
mutate(dataset = as.character(dataset)) %>%
left_join(survey_weights, by = c("dataset", "Type", "dir")) %>%
mutate(weighted_epred = .epred * weight)
posterior_totals <- posterior_weighted %>%
group_by(.draw, dataset, taxon) %>%
summarize(total_epred = sum(weighted_epred), .groups = "drop")
pairwise_overall <- posterior_totals %>%
rename(dataset1 = dataset, epred1 = total_epred) %>%
inner_join(
posterior_totals %>% rename(dataset2 = dataset, epred2 = total_epred),
by = c(".draw", "taxon")
) %>%
filter(dataset1 < dataset2) %>%
mutate(diff = epred1 - epred2)
summary_overall <- pairwise_overall %>%
group_by(taxon, dataset1, dataset2) %>%
summarize(mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
p_two_sided = 2 * min(mean(diff > 0), mean(diff < 0)),
.groups = "drop")
summary_overall %>%
mutate(p_adj = p.adjust(p_two_sided, method = "bonferroni")) %>%
filter(p_adj < 0.01) %>%
knitr::kable()
| taxon | dataset1 | dataset2 | mean_diff | lower | upper | p_two_sided | p_adj |
|---|---|---|---|---|---|---|---|
| ACER | dca17 | drm24 | -0.0133559 | -0.0237519 | -0.0054169 | 0 | 0 |
| ACER | drm24 | tt21 | 0.0193402 | 0.0107057 | 0.0297153 | 0 | 0 |
| AGAR | drm24 | tt21 | -0.0250150 | -0.0477264 | -0.0076373 | 0 | 0 |
| FAVI | dca17 | drm24 | -0.0222753 | -0.0353871 | -0.0110823 | 0 | 0 |
| FAVI | drm24 | tt23 | 0.0306226 | 0.0195208 | 0.0435807 | 0 | 0 |
| MADR | dca17 | tt21 | -0.0367612 | -0.0802725 | -0.0070603 | 0 | 0 |
| MADR | drm24 | tt21 | -0.0631096 | -0.1069007 | -0.0350379 | 0 | 0 |
| MADR | tt21 | tt23 | 0.0456567 | 0.0160680 | 0.0907910 | 0 | 0 |
| MEAN | dca17 | drm24 | 0.0216639 | 0.0145720 | 0.0288097 | 0 | 0 |
| MEAN | dca17 | tt23 | 0.0160771 | 0.0083237 | 0.0234092 | 0 | 0 |
| MUSS | drm24 | tt23 | 0.0065312 | 0.0023880 | 0.0117684 | 0 | 0 |
| MUSS | tt21 | tt23 | 0.0096960 | 0.0031804 | 0.0204790 | 0 | 0 |
| ORBI | dca17 | tt21 | -0.0105744 | -0.0231454 | -0.0033751 | 0 | 0 |
| ORBI | tt21 | tt23 | 0.0119922 | 0.0048028 | 0.0240391 | 0 | 0 |
| PORI | dca17 | drm24 | -0.1247687 | -0.2137173 | -0.0481533 | 0 | 0 |
| SIDE | dca17 | drm24 | -0.9663197 | -1.2684278 | -0.6910510 | 0 | 0 |
| SIDE | drm24 | tt23 | 0.7874485 | 0.4915615 | 1.1214898 | 0 | 0 |
| SINT | drm24 | tt21 | -0.2423520 | -0.4082447 | -0.1234907 | 0 | 0 |
| SINT | drm24 | tt23 | -0.1316070 | -0.1840258 | -0.0733950 | 0 | 0 |
impact_zones <- st_read("data/Impact_zones.kml") %>%
st_zm(drop = TRUE, what = "ZM") %>%
st_make_valid() %>%
st_transform(st_crs(polygons_clean))
## Reading layer `Impact_zones' from data source
## `/Users/rosscunning/Projects/PENIP/data/Impact_zones.kml' using driver `KML'
## Simple feature collection with 9 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY, XYZ
## Bounding box: xmin: -80.10538 ymin: 26.08289 xmax: -80.08298 ymax: 26.1069
## z_range: zmin: -21.40724 zmax: 6.930363e-14
## Geodetic CRS: WGS 84
impact_zones <- impact_zones %>%
rename(ImpactZone = Name) # or whatever column contains zone names
impact_zones_plot <- impact_zones %>%
mutate(linetype_group = "Impact Zone")
ggplot() +
# Habitat polygons
geom_sf(data = polygons_clean, aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
# Impact zones with dummy linetype for legend
geom_sf(data = impact_zones_plot, aes(linetype = linetype_group),
fill = NA, color = "black", linewidth = 0.6, show.legend = TRUE) +
# Color scales
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
scale_linetype_manual(name = "", values = c("Impact Zone" = "dashed")) +
# Theme and layout
theme_minimal() +
labs(title = "Habitat Polygons within Impact Zones", fill = "Type") +
theme(legend.position = "right") +
xlim(-80.11, -80.079) +
ylim(26.08, 26.11)
# Spatial intersection of habitat polygons with impact zones
habitat_in_zones <- st_intersection(polygons_clean, impact_zones)
# Use a projected CRS for accurate area (e.g., UTM Zone 17N for South Florida)
habitat_in_zones_proj <- habitat_in_zones %>%
st_transform(32617) %>%
mutate(area_m2 = st_area(geometry))
# Adjust column names depending on your Impact Zones KML
area_summary <- habitat_in_zones_proj %>%
st_drop_geometry() %>%
group_by(Type, ImpactZone) %>%
summarize(total_area_m2 = sum(as.numeric(area_m2)), .groups = "drop")
area_summary[area_summary$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"
area_summary <- area_summary %>%
group_by(Type, ImpactZone) %>%
summarize(total_area_m2 = sum(total_area_m2))
Right now this does NOT differentiate N and S of the channel… This is because no significant differences based on direction were detected within any habitat in any dataset (except TT21 NRC, but this only had 2 sites N of channel, and they were right on the side of the channel…)
BREAK DOWN TOTALS BY ADULT AND JUVENILE CORALS
area_summary <- area_summary %>%
mutate(ImpactZone = factor(ImpactZone,
levels = c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
"Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")))
totals <- left_join(total_abund, area_summary, by = "Type") %>%
mutate(tot_estimate = fit_mean * total_area_m2,
tot_lower = fit_lower * total_area_m2,
tot_upper = fit_upper * total_area_m2) %>%
mutate(Type = factor(Type, levels = type_levels)) %>%
select(dataset, Type, ImpactZone, tot_estimate, tot_lower, tot_upper)
# Define ordered impact zone levels
impact_levels <- c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
"Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")
# Ensure correct factor order
totals <- totals %>%
mutate(ImpactZone = factor(ImpactZone, levels = impact_levels))
# Step 1: Compute cumulative totals across impact zones
cumulative_zones_long <- seq_along(impact_levels) %>%
map_dfr(function(i) {
zone_subset <- impact_levels[1:i]
totals %>%
drop_na(dataset) %>%
filter(ImpactZone %in% zone_subset) %>%
group_by(dataset, Type) %>%
summarize(
tot = sum(tot_estimate),
lower = sum(tot_lower),
upper = sum(tot_upper),
.groups = "drop"
) %>%
mutate(zone_group = paste0("z", i))
})
# Step 2: Add missing combinations as NA rows
cumulative_complete <- cumulative_zones_long %>%
filter(zone_group == "z7") %>%
complete(dataset, Type, fill = list(tot = 0, lower = NA, upper = NA))
# Optional: factor levels for plotting order
cumulative_complete <- cumulative_complete %>%
mutate(
dataset = factor(dataset, levels = dataset_levels),
Type = factor(Type, levels = type_levels) # if you have custom ordering
)
# Step 3: Plot
p1 <- ggplot(cumulative_complete, aes(x = Type, y = tot, fill = dataset)) +
geom_col(position = position_dodge(width = 0.9)) +
geom_errorbar(aes(ymin = lower, ymax = upper),
position = position_dodge(width = 0.9), width = 0.2) +
scale_fill_discrete(labels = dataset_labels) +
labs(x = "Habitat Type",
y = "Total corals",
title = "Total corals in all impact zones, by habitat") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# count sites per dataset/habitat type
nsites <- dff %>%
distinct(dataset, site, Type) %>%
count(Type, dataset) %>%
complete(Type, dataset, fill = list(n = 0)) %>%
mutate(dataset = factor(dataset, levels = dataset_levels))
p1 + geom_text(
data = nsites,
aes(x = Type, y = 0, label = n, group = dataset),
position = position_dodge(width = 0.9),
vjust = -1, size = 3
)
p2 <- cumulative_complete %>%
group_by(dataset) %>%
summarize(value = sum(tot, na.rm = T),
lower = sum(lower, na.rm = T),
upper = sum(upper, na.rm = T)) %>%
mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
ggplot(aes(x = 1, y = value, fill = dataset)) +
geom_col(position = position_dodge()) +
geom_errorbar(aes(ymin = lower, ymax = upper),
position = position_dodge(width = 0.9), width = 0.2) +
scale_fill_discrete(labels = dataset_labels) +
labs(x = "", y = "Total corals", title = "Total corals in all impact zones - all habitats")
nsitest <- nsites %>%
group_by(dataset) %>%
summarize(n = sum(n))
p2 + geom_text(
data = nsitest,
aes(x = 1, y = 0, label = n, group = dataset),
position = position_dodge(width = 0.9),
vjust = -1, size = 3
)